# ██████╗ ██╗ ██╗██╗ ██╗██╗ ██████╗ ██████╗ ██╗ ██╗███████╗██████╗ ███████╗
# ██╔══██╗██║ ██║╚██╗ ██╔╝██║ ██╔═══██╗██╔══██╗██║ ██║██╔════╝██╔══██╗██╔════╝
# ██████╔╝███████║ ╚████╔╝ ██║ ██║ ██║██████╔╝███████║█████╗ ██████╔╝█████╗
# ██╔═══╝ ██╔══██║ ╚██╔╝ ██║ ██║ ██║██╔═══╝ ██╔══██║██╔══╝ ██╔══██╗██╔══╝
# ██║ ██║ ██║ ██║ ███████╗╚██████╔╝██║ ██║ ██║███████╗██║ ██║███████╗
# ╚═╝ ╚═╝ ╚═╝ ╚═╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═╝╚══════╝╚═╝ ╚═╝╚══════╝
## PHYLOPHERE: A Nextflow pipeline including a complete set
## of phylogenetic comparative tools and analyses for Phenome-Genome studies
### Github: https://github.com/nozerorma/caastools/nf-phylophere
### Author: Miguel Ramon (miguel.ramon@upf.edu)
#### File: CI-composition.Rmd
This script computes 95% credibility intervals (CIs) for the trait of interest:
This section configures the working environment, sets directories, and loads necessary functions and libraries.
# Call the setup function from commons.R
source("./obj/commons.R")
## [DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | neoplasia_prevalence | adult_necropsy_count | neoplasia_necropsy | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | malignant_prevalence | LQ
## [DEBUG] seed_val = 1998
## [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/dc/93da68129eef643ee49a40481df3df
## [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/dc/93da68129eef643ee49a40481df3df/obj
## [DEBUG] resultsDir = data_exploration
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## [1] "Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/dc/93da68129eef643ee49a40481df3df"
## [1] "Results Directory: data_exploration"
## [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/dc/93da68129eef643ee49a40481df3df
## [DEBUG] Results Directory: data_exploration
## [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration
## [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution
## [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots
## [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/3.Phylogenetic_distribution
## [DEBUG] ci_dir = data_exploration/1.Data-exploration/4.CI_overlaps
## [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17
## [1] "Loading trait data from: cancer_traits_processed-LQ.csv"
## [DEBUG] trait_path = cancer_traits_processed-LQ.csv
## [DEBUG] trait_df rows = 47, cols = 19
## [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ
## [DEBUG] trait_df species unique = 47
## [DEBUG] tree_path = science.abn7829_data_s4.nex.tree
## [DEBUG] tree tips = 236, nodes = 235
## [DEBUG] clade_name = primates
## [DEBUG] taxon_of_interest = family
## [DEBUG] trait = neoplasia_prevalence
## [DEBUG] n_trait = neoplasia_necropsy
## [DEBUG] p_trait = adult_necropsy_count
## [DEBUG] has.p = TRUE, p missing = 0
## [DEBUG] has.n = TRUE, n missing = 0
## Warning: package 'ape' was built under R version 4.4.2
##
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
##
## where
## [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv
## [DEBUG] tax_id_df rows = 1290, distinct taxa = 807
## [DEBUG] has.TAX_ID = TRUE
## [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0
## [DEBUG] normalized tax_id from merged columns, missing tax_id = 0
## [DEBUG] tree_ids rows = 40, missing tax_id = 0
## [DEBUG] common_tax_ids = 40
## [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39
## [DEBUG] trait_df after TAX_ID tree filter rows = 40
## [DEBUG] secondary_trait = malignant_prevalence
## [DEBUG] has.secondary = TRUE, missing = 0
## [DEBUG] branch_trait = LQ
## [DEBUG] has.branch = TRUE, missing = 0
setup_rmd()
# Debug helper (prints into HTML output)
if (is.null(getOption("phylo_debug"))) {
options(phylo_debug = TRUE)
}
if (!exists("phylo_debug_log", envir = .GlobalEnv)) {
phylo_debug_log <- character()
}
if (!exists("debug_log", inherits = TRUE)) {
debug_log <- function(...) {
msg <- sprintf(...)
phylo_debug_log <<- c(phylo_debug_log, msg)
cat("[DEBUG] ", msg, "\n", sep = "")
}
}
# Load additional libraries
library(dplyr)
library(tidyr)
library(tibble)
library(ggplot2)
library(patchwork)
library(ggtext)
color_palette <- paste0(clade_name, "_palette") # Color palette for the clade
capitalized_taxon <- tools::toTitleCase(taxon_of_interest) # Capitalized taxon name
capitalized_trait <- tools::toTitleCase(gsub("_", " ", trait)) # Capitalized and deconvoluted trait name
capitalized_clade <- tools::toTitleCase(gsub("_", " ", clade_name)) # Capitalized and deconvoluted clade name
createDir(ci_dir)
## [DEBUG] createDir: created data_exploration/1.Data-exploration/4.CI_overlaps
This section calculates 95% confidence intervals for malignant and neoplasia prevalence per species using both Wilson and Jeffreys methods.
# Compute CIs directly using DescTools and binom packages
trait_ci <- trait_df %>%
group_by(species) %>%
dplyr::rename(n_population = .data[[p_trait]],
n_observation = .data[[n_trait]],
trait = .data[[trait]]) %>%
dplyr::mutate(
# Jeffreys method CIs
trait_ci_lb = binom::binom.confint(n_observation, n_population,
method = "bayes", priors = c(0.5, 0.5),
conf.level = 0.95)[, 5],
trait_ci_ub = binom::binom.confint(n_observation, n_population,
method = "bayes", priors = c(0.5, 0.5),
conf.level = 0.95)[, 6],
trait_ci = paste0("[", round(trait_ci_lb, 2), "-", round(trait_ci_ub, 2), "]")
) %>%
ungroup()
## Warning: Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.
## ℹ Please use `all_of(var)` (or `any_of(var)`) instead of `.data[[var]]`
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Save results
write.csv(trait_ci, file.path(ci_dir, "trait_CI.csv"),
quote = FALSE, row.names = FALSE)
# Display preview
head(trait_ci)
## # A tibble: 6 × 22
## species common_name family n_population n_observation trait benign_count
## <chr> <chr> <chr> <int> <int> <dbl> <int>
## 1 Alouatta_ca… Black howl… Ateli… 32 4 0.125 3
## 2 Ateles_geof… Black-hand… Ateli… 43 13 0.302 5
## 3 Callimico_g… Goeldi's m… Cebid… 69 15 0.217 8
## 4 Callithrix_… White-fron… Cebid… 31 3 0.0968 2
## 5 Callithrix_… Common mar… Cebid… 41 1 0.0244 0
## 6 Cebuella_py… Pygmy marm… Cebid… 41 4 0.0976 2
## # ℹ 15 more variables: benign_prevalence <dbl>, malignant_count <int>,
## # malignant_prevalence <dbl>, body_mass <dbl>, mass_g <dbl>,
## # log_body_mass <dbl>, mature_f <dbl>, mature_m <dbl>, mature_age <dbl>,
## # MLS.anage <dbl>, LQ <dbl>, tax_id <int>, trait_ci_lb <dbl>,
## # trait_ci_ub <dbl>, trait_ci <chr>
This section prepares data for comparing cancer prevalence between all pairs of species.
# Helper function to create pairwise comparisons for a given method
create_pairwise_data <- function(ci_data) {
# Define column names for lower and upper bounds
ci_subset <- ci_data %>%
dplyr::select(species, n_population,
n_observation, trait,
trait_ci_lb, trait_ci_ub)
# Generate all pairwise species combinations
pairwise <- expand.grid(species1 = ci_subset$species,
species2 = ci_subset$species,
stringsAsFactors = FALSE)
# Merge species data for each pair
pairwise <- pairwise %>%
left_join(ci_subset %>% rename_with(~paste0(.x, "1"), .cols = -species),
by = c("species1" = "species")) %>%
left_join(ci_subset %>% rename_with(~paste0(.x, "2"), .cols = -species),
by = c("species2" = "species"))
pairwise <- pairwise %>%
mutate(
trait_diff = trait1 - trait2,
trait_overlap = ci_overlap(
trait_ci_lb1, trait_ci_ub1, trait_ci_lb2, trait_ci_ub2
),
diff_value = trait_diff,
overlap_use = trait_overlap
)
return(pairwise)
}
# Create pairwise data
pairwise_data <- create_pairwise_data(trait_ci)
# Save results
write.csv(pairwise_data, file.path(ci_dir, "pairwise_data.csv"),
quote = FALSE, row.names = FALSE)
# Display preview
head(pairwise_data)
## species1 species2 n_population1 n_observation1 trait1
## 1 Alouatta_caraya Alouatta_caraya 32 4 0.12500000
## 2 Ateles_geoffroyi Alouatta_caraya 43 13 0.30232558
## 3 Callimico_goeldii Alouatta_caraya 69 15 0.21739130
## 4 Callithrix_geoffroyi Alouatta_caraya 31 3 0.09677419
## 5 Callithrix_jacchus Alouatta_caraya 41 1 0.02439024
## 6 Cebuella_pygmaea Alouatta_caraya 41 4 0.09756098
## trait_ci_lb1 trait_ci_ub1 n_population2 n_observation2 trait2 trait_ci_lb2
## 1 3.352372e-02 0.25257350 32 4 0.125 0.03352372
## 2 1.758696e-01 0.44262825 32 4 0.125 0.03352372
## 3 1.281707e-01 0.31916086 32 4 0.125 0.03352372
## 4 1.812308e-02 0.21626214 32 4 0.125 0.03352372
## 5 4.695278e-05 0.09147094 32 4 0.125 0.03352372
## 6 2.547171e-02 0.20034261 32 4 0.125 0.03352372
## trait_ci_ub2 trait_diff trait_overlap diff_value overlap_use
## 1 0.2525735 0.00000000 TRUE 0.00000000 TRUE
## 2 0.2525735 0.17732558 TRUE 0.17732558 TRUE
## 3 0.2525735 0.09239130 TRUE 0.09239130 TRUE
## 4 0.2525735 -0.02822581 TRUE -0.02822581 TRUE
## 5 0.2525735 -0.10060976 TRUE -0.10060976 TRUE
## 6 0.2525735 -0.02743902 TRUE -0.02743902 TRUE
This section transforms pairwise data into matrix format for visualization.
# Helper function to build difference matrix
build_diff_matrix <- function(pairwise_data) {
pairwise_data %>%
select(species1, species2, diff_value) %>%
pivot_wider(names_from = species2, values_from = diff_value) %>%
column_to_rownames("species1") %>%
as.matrix()
}
# Create matrices
matrix <- build_diff_matrix(pairwise_data)
# Save matrices
write.csv(matrix, file.path(ci_dir, "diff_matrix-CI.csv"),
quote = FALSE, row.names = TRUE)
# Display preview
head(matrix)
## Alouatta_caraya Ateles_geoffroyi Callimico_goeldii
## Alouatta_caraya 0.00000000 -0.17732558 -0.09239130
## Ateles_geoffroyi 0.17732558 0.00000000 0.08493428
## Callimico_goeldii 0.09239130 -0.08493428 0.00000000
## Callithrix_geoffroyi -0.02822581 -0.20555139 -0.12061711
## Callithrix_jacchus -0.10060976 -0.27793534 -0.19300106
## Cebuella_pygmaea -0.02743902 -0.20476461 -0.11983033
## Callithrix_geoffroyi Callithrix_jacchus Cebuella_pygmaea
## Alouatta_caraya 0.0282258065 0.10060976 0.0274390244
## Ateles_geoffroyi 0.2055513878 0.27793534 0.2047646058
## Callimico_goeldii 0.1206171108 0.19300106 0.1198303287
## Callithrix_geoffroyi 0.0000000000 0.07238395 -0.0007867821
## Callithrix_jacchus -0.0723839496 0.00000000 -0.0731707317
## Cebuella_pygmaea 0.0007867821 0.07317073 0.0000000000
## Cercopithecus_neglectus Colobus_guereza Eulemur_macaco
## Alouatta_caraya 0.05357143 0.021907216 -0.175000000
## Ateles_geoffroyi 0.23089701 0.199232798 0.002325581
## Callimico_goeldii 0.14596273 0.114298521 -0.082608696
## Callithrix_geoffroyi 0.02534562 -0.006318590 -0.203225806
## Callithrix_jacchus -0.04703833 -0.078702540 -0.275609756
## Cebuella_pygmaea 0.02613240 -0.005531808 -0.202439024
## Galago_moholi Galago_senegalensis Gorilla_gorilla
## Alouatta_caraya 0.031250000 0.109848485 -0.08395522
## Ateles_geoffroyi 0.208575581 0.287174066 0.09337036
## Callimico_goeldii 0.123641304 0.202239789 0.00843608
## Callithrix_geoffroyi 0.003024194 0.081622678 -0.11218103
## Callithrix_jacchus -0.069359756 0.009238729 -0.18456498
## Cebuella_pygmaea 0.003810976 0.082409460 -0.11139425
## Hylobates_lar Lemur_catta Leontopithecus_chrysomelas
## Alouatta_caraya -0.03409091 -0.03073770 0.007352941
## Ateles_geoffroyi 0.14323467 0.14658788 0.184678523
## Callimico_goeldii 0.05830040 0.06165360 0.099744246
## Callithrix_geoffroyi -0.06231672 -0.05896351 -0.020872865
## Callithrix_jacchus -0.13470067 -0.13134746 -0.093256815
## Cebuella_pygmaea -0.06152993 -0.05817673 -0.020086083
## Leontopithecus_rosalia Macaca_fascicularis Macaca_fuscata
## Alouatta_caraya 0.002777778 0.07094595 0.031976744
## Ateles_geoffroyi 0.180103359 0.24827153 0.209302326
## Callimico_goeldii 0.095169082 0.16333725 0.124368049
## Callithrix_geoffroyi -0.025448029 0.04272014 0.003750938
## Callithrix_jacchus -0.097831978 -0.02966381 -0.068633012
## Cebuella_pygmaea -0.024661247 0.04350692 0.004537720
## Macaca_nigra Macaca_silenus Mandrillus_sphinx
## Alouatta_caraya 0.097222222 0.036764706 0.036111111
## Ateles_geoffroyi 0.274547804 0.214090287 0.213436693
## Callimico_goeldii 0.189613527 0.129156010 0.128502415
## Callithrix_geoffroyi 0.068996416 0.008538899 0.007885305
## Callithrix_jacchus -0.003387534 -0.063845050 -0.064498645
## Cebuella_pygmaea 0.069783198 0.009325681 0.008672087
## Mico_argentatus Microcebus_murinus Nycticebus_coucang
## Alouatta_caraya 0.07500000 -0.12061404 0.04166667
## Ateles_geoffroyi 0.25232558 0.05671155 0.21899225
## Callimico_goeldii 0.16739130 -0.02822273 0.13405797
## Callithrix_geoffroyi 0.04677419 -0.14883984 0.01344086
## Callithrix_jacchus -0.02560976 -0.22122379 -0.05894309
## Cebuella_pygmaea 0.04756098 -0.14805306 0.01422764
## Nycticebus_pygmaeus Pan_troglodytes Papio_hamadryas
## Alouatta_caraya -0.14030612 -0.04342105 0.07645631
## Ateles_geoffroyi 0.03701946 0.13390453 0.25378189
## Callimico_goeldii -0.04791482 0.04897025 0.16884762
## Callithrix_geoffroyi -0.16853193 -0.07164686 0.04823050
## Callithrix_jacchus -0.24091588 -0.14403081 -0.02415345
## Cebuella_pygmaea -0.16774515 -0.07086008 0.04901729
## Propithecus_coquereli Saguinus_bicolor Saguinus_imperator
## Alouatta_caraya -0.03500000 0.08500000 0.12500000
## Ateles_geoffroyi 0.14232558 0.26232558 0.30232558
## Callimico_goeldii 0.05739130 0.17739130 0.21739130
## Callithrix_geoffroyi -0.06322581 0.05677419 0.09677419
## Callithrix_jacchus -0.13560976 -0.01560976 0.02439024
## Cebuella_pygmaea -0.06243902 0.05756098 0.09756098
## Saguinus_midas Saguinus_oedipus Saimiri_sciureus
## Alouatta_caraya 0.12500000 -0.04664179 0.04741379
## Ateles_geoffroyi 0.30232558 0.13068379 0.22473937
## Callimico_goeldii 0.21739130 0.04574951 0.13980510
## Callithrix_geoffroyi 0.09677419 -0.07486760 0.01918799
## Callithrix_jacchus 0.02439024 -0.14725155 -0.05319596
## Cebuella_pygmaea 0.09756098 -0.07408082 0.01997477
## Sapajus_apella Theropithecus_gelada Trachypithecus_auratus
## Alouatta_caraya 0.009615385 0.105769231 0.08796296
## Ateles_geoffroyi 0.186940966 0.283094812 0.26528854
## Callimico_goeldii 0.102006689 0.198160535 0.18035427
## Callithrix_geoffroyi -0.018610422 0.077543424 0.05973716
## Callithrix_jacchus -0.090994371 0.005159475 -0.01264679
## Cebuella_pygmaea -0.017823640 0.078330206 0.06052394
## Trachypithecus_cristatus Trachypithecus_francoisi
## Alouatta_caraya 0.08928571 -0.008333333
## Ateles_geoffroyi 0.26661130 0.168992248
## Callimico_goeldii 0.18167702 0.084057971
## Callithrix_geoffroyi 0.06105991 -0.036559140
## Callithrix_jacchus -0.01132404 -0.108943089
## Cebuella_pygmaea 0.06184669 -0.035772358
## Varecia_rubra Varecia_variegata
## Alouatta_caraya -0.06144068 -0.085526316
## Ateles_geoffroyi 0.11588490 0.091799266
## Callimico_goeldii 0.03095063 0.006864989
## Callithrix_geoffroyi -0.08966648 -0.113752122
## Callithrix_jacchus -0.16205043 -0.186136072
## Cebuella_pygmaea -0.08887970 -0.112965340
This section creates heatmaps to visualize pairwise differences in prevalence. Species labels are colored based on prevalence quantiles.
# Helper function to categorize species by quantile
categorize_species_by_quantile <- function(trait_data) {
# Calculate quantiles
top_trait <- quantile(trait_data$trait, 0.75, na.rm = TRUE)
bot_trait <- quantile(trait_data$trait, 0.25, na.rm = TRUE)
# Identify species in each quantile
list(
top_trait = trait_data$species[trait_data$trait >= top_trait],
bot_trait = trait_data$species[trait_data$trait <= bot_trait]
)
}
# Helper function to create colored labels
create_colored_labels <- function(species_list, quantile_cats) {
color_map <- c(
"Top" = "#D73027",
"Bottom" = "#1A9850"
)
sapply(species_list, function(sp) {
in_top_trait <- sp %in% quantile_cats$top_trait
in_bot_trait <- sp %in% quantile_cats$bot_trait
if (in_top_trait) {
paste0("<span style='color:", color_map["Top"], ";'>", sp, "</span>")
} else if (in_bot_trait) {
paste0("<span style='color:", color_map["Bottom"], ";'>", sp, "</span>")
} else {
sp
}
})
}
# Helper function to create legend plot
create_legend_plot <- function() {
legend_df <- tibble(
category = c("Top", "Bottom"),
color = c("#D73027", "#1A9850")
)
ggplot(legend_df, aes(x = 1, y = category, color = category)) +
geom_point(size = 0) +
scale_color_manual(
values = setNames(legend_df$color, legend_df$category),
guide = guide_legend(override.aes = list(size = 5))
) +
labs(color = "Trait Categories") +
theme_void() +
theme(
legend.position = "right",
legend.title = element_text(size = 12),
legend.text = element_text(size = 10)
)
}
# Helper function to create heatmap
create_heatmap <- function(pairwise_data, colored_labels,
highlight_nonoverlap = TRUE) {
# Ensure label mapping is stable across axes
if (is.null(names(colored_labels))) {
stop("colored_labels must be a named vector with species names.")
}
pairwise_data <- pairwise_data %>%
mutate(
species1 = factor(species1, levels = names(colored_labels)),
species2 = factor(species2, levels = names(colored_labels))
)
p <- ggplot(pairwise_data, aes(x = species2, y = species1, fill = abs(diff_value))) +
geom_tile(color = "black", size = 0.25) +
geom_text(aes(label = sprintf("%.2f", abs(diff_value))), size = 3) +
scale_fill_gradient(low = "white", high = "salmon3", name = "Difference") +
scale_x_discrete(labels = colored_labels) +
scale_y_discrete(labels = colored_labels) +
theme_minimal() +
theme(
axis.text.x = element_markdown(angle = 45, hjust = 1),
axis.text.y = element_markdown(angle = 0),
panel.grid = element_blank(),
legend.position = "none"
)
# Add bold border for non-overlapping CIs if requested
if (highlight_nonoverlap) {
p <- p +
geom_tile(data = filter(pairwise_data, !overlap_use),
fill = NA, color = "black", size = 1.5) +
labs(
x = "Species", y = "Species",
title = paste("Pairwise Differences in trait: ", capitalized_trait, " with Non-overlapping CIs", sep = ""),
caption = "Bold border: non-overlapping CIs."
)
} else {
p <- p +
labs(
x = "Species", y = "Species",
title = paste("Pairwise Differences in trait: ", capitalized_trait, sep = ""),
caption = "Bold border: non-overlapping CIs."
)
}
return(p)
}
# Prepare colored labels
quantile_cats <- categorize_species_by_quantile(trait_df)
my_labels <- unique(c(pairwise_data$species1, pairwise_data$species2))
my_labels_colored <- setNames(create_colored_labels(my_labels, quantile_cats), my_labels)
# Save colored labels
write.csv(my_labels_colored, file.path(ci_dir, "my_labels_colored.csv"),
quote = FALSE, row.names = TRUE)
# Create legend
legend_plot <- create_legend_plot()
# Create heatmaps with non-overlapping CI highlights
p_overlap <- create_heatmap(pairwise_data, my_labels_colored, highlight_nonoverlap = TRUE) +
legend_plot + plot_layout(ncol = 2, widths = c(1000, 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p_flat <- create_heatmap(pairwise_data, my_labels_colored, highlight_nonoverlap = FALSE) +
legend_plot + plot_layout(ncol = 2, widths = c(1000, 1))
# Save heatmaps
ggsave(file.path(ci_dir, "visual_matrix_no_overlap.png"), p_overlap,
width = 15, height = 12, dpi = "retina")
ggsave(file.path(ci_dir, "visual_matrix_flat.png"), p_flat,
width = 15, height = 12, dpi = "retina")
# Display plots
p_overlap
p_flat
This section creates readable summary tables for cancer traits and pairwise comparisons.
# Polished table for cancer traits
trait_ci_polished <- trait_ci %>%
mutate(
trait = round(trait, 3),
) %>%
dplyr::select(species, n_population, n_observation,
trait, trait_ci_lb, trait_ci_ub, trait_ci) %>%
dplyr::rename(
"Trait" = trait,
"Trait CI Lower Bound" = trait_ci_lb,
"Trait CI Upper Bound" = trait_ci_ub,
"Trait CI" = trait_ci
)
write.csv(trait_ci_polished,
file.path(ci_dir, "trait_CI_polished.csv"),
quote = FALSE, row.names = FALSE)
# Polished pairwise table - recreate from original data with all CI columns
trait_diff <- trait_ci %>%
dplyr::select(species, n_population , n_observation, trait,
trait_ci_lb, trait_ci_ub, trait_ci)
# Generate all pairwise species combinations
pairwise_data_full <- expand.grid(species1 = trait_diff$species,
species2 = trait_diff$species,
stringsAsFactors = FALSE) %>%
left_join(trait_diff %>% rename_with(~paste0(.x, "1"), .cols = -species),
by = c("species1" = "species")) %>%
left_join(trait_diff %>% rename_with(~paste0(.x, "2"), .cols = -species),
by = c("species2" = "species"))
# Create polished pairwise table
pairwise_data_end_polished <- pairwise_data_full %>%
dplyr::mutate(
trait_diff = abs(trait1 - trait2),
trait_diff = round(trait_diff, 3),
# Format CIs for both Wilson and Jeffreys methods
trait_CI1 = sprintf("[%.2f-%.2f]",
as.numeric(trait_ci_lb1),
as.numeric(trait_ci_ub1)),
trait_CI2 = sprintf("[%.2f-%.2f]",
as.numeric(trait_ci_lb2),
as.numeric(trait_ci_ub2)),
diff_value = trait_diff
) %>%
dplyr::select(species1, species2,
trait_diff, trait_CI1, trait_CI2) %>%
dplyr::rename(
"Trait Difference" = trait_diff,
"Trait CI1" = trait_CI1,
"Trait CI2" = trait_CI2
)
write.csv(pairwise_data_end_polished,
file.path(ci_dir, "pairwise_data_end_polished.csv"),
quote = FALSE, row.names = FALSE)
# Display preview
head(trait_ci_polished)
## # A tibble: 6 × 7
## species n_population n_observation Trait `Trait CI Lower Bound`
## <chr> <int> <int> <dbl> <dbl>
## 1 Alouatta_caraya 32 4 0.125 0.0335
## 2 Ateles_geoffroyi 43 13 0.302 0.176
## 3 Callimico_goeldii 69 15 0.217 0.128
## 4 Callithrix_geoffroyi 31 3 0.097 0.0181
## 5 Callithrix_jacchus 41 1 0.024 0.0000470
## 6 Cebuella_pygmaea 41 4 0.098 0.0255
## # ℹ 2 more variables: `Trait CI Upper Bound` <dbl>, `Trait CI` <chr>
if (length(phylo_debug_log) > 0) {
cat("### Debug log\n")
cat(paste0("[DEBUG] ", phylo_debug_log, "\n"))
}
[DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | neoplasia_prevalence | adult_necropsy_count | neoplasia_necropsy | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | malignant_prevalence | LQ [DEBUG] seed_val = 1998 [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/dc/93da68129eef643ee49a40481df3df [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/dc/93da68129eef643ee49a40481df3df/obj [DEBUG] resultsDir = data_exploration [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/dc/93da68129eef643ee49a40481df3df [DEBUG] Results Directory: data_exploration [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/3.Phylogenetic_distribution [DEBUG] ci_dir = data_exploration/1.Data-exploration/4.CI_overlaps [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17 [DEBUG] trait_path = cancer_traits_processed-LQ.csv [DEBUG] trait_df rows = 47, cols = 19 [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ [DEBUG] trait_df species unique = 47 [DEBUG] tree_path = science.abn7829_data_s4.nex.tree [DEBUG] tree tips = 236, nodes = 235 [DEBUG] clade_name = primates [DEBUG] taxon_of_interest = family [DEBUG] trait = neoplasia_prevalence [DEBUG] n_trait = neoplasia_necropsy [DEBUG] p_trait = adult_necropsy_count [DEBUG] has.p = TRUE, p missing = 0 [DEBUG] has.n = TRUE, n missing = 0 [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv [DEBUG] tax_id_df rows = 1290, distinct taxa = 807 [DEBUG] has.TAX_ID = TRUE [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0 [DEBUG] normalized tax_id from merged columns, missing tax_id = 0 [DEBUG] tree_ids rows = 40, missing tax_id = 0 [DEBUG] common_tax_ids = 40 [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39 [DEBUG] trait_df after TAX_ID tree filter rows = 40 [DEBUG] secondary_trait = malignant_prevalence [DEBUG] has.secondary = TRUE, missing = 0 [DEBUG] branch_trait = LQ [DEBUG] has.branch = TRUE, missing = 0 [DEBUG] createDir: created data_exploration/1.Data-exploration/4.CI_overlaps
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-conda-linux-gnu
## Running under: TUXEDO OS
##
## Matrix products: default
## BLAS/LAPACK: /home/miguel/micromamba/envs/caas_global_cancer/lib/libopenblasp-r0.3.28.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=es_ES.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Madrid
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggtext_0.1.2 patchwork_1.3.0 ggplot2_3.5.1 tibble_3.2.1
## [5] ape_5.8-1 tidyr_1.3.1 dplyr_1.1.4 readr_2.1.5
##
## loaded via a namespace (and not attached):
## [1] fastmatch_1.1-6 gtable_0.3.6 xfun_0.51
## [4] bslib_0.9.0 lattice_0.22-6 tzdb_0.5.0
## [7] numDeriv_2016.8-1.1 quadprog_1.5-8 vctrs_0.6.5
## [10] tools_4.4.1 generics_0.1.3 parallel_4.4.1
## [13] pkgconfig_2.0.3 Matrix_1.7-3 scatterplot3d_0.3-44
## [16] lifecycle_1.0.4 binom_1.1-1.1 stringr_1.5.1
## [19] compiler_4.4.1 farver_2.1.2 textshaping_1.0.0
## [22] munsell_0.5.1 mnormt_2.1.1 combinat_0.0-8
## [25] codetools_0.2-20 litedown_0.6 htmltools_0.5.8.1
## [28] maps_3.4.2.1 sass_0.4.9 yaml_2.3.10
## [31] pillar_1.10.1 crayon_1.5.3 jquerylib_0.1.4
## [34] MASS_7.3-65 cachem_1.1.0 clusterGeneration_1.3.8
## [37] iterators_1.0.14 foreach_1.5.2 nlme_3.1-167
## [40] phangorn_2.12.1 commonmark_1.9.5 tidyselect_1.2.1
## [43] digest_0.6.37 stringi_1.8.4 purrr_1.0.4
## [46] labeling_0.4.3 fastmap_1.2.0 grid_4.4.1
## [49] colorspace_2.1-1 expm_1.0-0 cli_3.6.4
## [52] magrittr_2.0.3 optimParallel_1.0-2 utf8_1.2.4
## [55] withr_3.0.2 scales_1.3.0 DEoptim_2.2-8
## [58] rmarkdown_2.29 igraph_2.1.4 ragg_1.3.3
## [61] hms_1.1.3 coda_0.19-4.1 evaluate_1.0.3
## [64] knitr_1.50 phytools_2.4-4 doParallel_1.0.17
## [67] markdown_2.0 rlang_1.1.5 gridtext_0.1.5
## [70] Rcpp_1.0.14 glue_1.8.0 xml2_1.3.8
## [73] jsonlite_1.9.1 R6_2.6.1 systemfonts_1.2.1